knitr::opts_chunk$set(
warning = TRUE, # show warnings during codebook generation
message = TRUE, # show messages during codebook generation
error = TRUE, # do not interrupt codebook generation in case of errors,
# usually better for debugging
echo = TRUE # show R code
)
ggplot2::theme_set(ggplot2::theme_bw())
library(rio)
library(labelled)
library(codebook)
##
## Attaching package: 'codebook'
## The following object is masked from 'package:labelled':
##
## to_factor
codebook_data <- import("../data_processing/output_data/trial_data/sr_trial_data.csv.gz")
# cat(paste(names(codebook_data), collapse = " = '', \n"))
var_label(codebook_data) <- list(
observation = "Unique participant ID number.",
fix_sender = "The sender_id column in a sortable format. You can sort the data by observation and this column to ensure it is in trial order.",
response = "Participant response to the trial.",
response_action = "Keypress used to indicate their response to the trial.",
ended_on = "How the trial ended (timeout, form submit, completion, response).",
duration = "The duration in milliseconds of the entire trial from time shown to time end. This variable was set to NA if the trial was incorrectly answered, too long (3000+ms) or too short (<=160ms). The original duration is also preserved.",
time_run = "The time in milliseconds from the start of the experiment it took to run (start to display) the trial.",
time_render = "The time in milliseconds from the start of the experiment it took to render (prepare, get ready for) the trial.",
time_show = "The time in milliseconds from the start of the experiment it took to show the trial on the screen to the participant.",
time_end = "The time in milliseconds from the start of the experiment it took to end the current trial.",
time_commit = "The time in milliseconds from the start of the experiment it took to save the current trial.",
timestamp = "The approximate timestamp of the trial in UTC server time.",
time_switch = "The time in milliseconds from the start of the experiment it took to switch between the previous trial and the current trial.",
word = "The string of letters/characters shown on the screen for the trial.",
class = "The type of stimuli shown on the screen (word or nonword).",
correct_response = "The correct answer for the trial.",
correct = "A logical variable indicating if the participant got the trial answer correct.",
original_duration = "The duration in milliseconds of the entire trial from time shown to time end.",
Z_RT = 'The Z-scored response latency (by participant) of the duration column.',
keep = 'If the trial level data should be kept based on our exclusion rules (not too long < 3000 ms, not too short > 160ms, correctly answered).',
keep_participant = 'If the participant should be kept based on our exclusion rules: must be 18 years old, saw at least 100 trials, correctly answered at least 80% of the trials SEEN.',
keep_participant_answered = 'If the participant should be kept based on our exclusion rules: must be 18 years old, saw at least 100 trials, correctly answered at least 80% of the trials ANSWERED.')
metadata(codebook_data)$name <- "Semantic Priming Across Many Languages Trial Level Data"
metadata(codebook_data)$description <- "This dataset contains the processed trial data of the data collection from the SPAML project (example in Serbian, but all datasets are the same). The data is presented here in long format, with each trial representing one row in the data. All other trials (consent/demographic screens), fillers, fixation crosses, etc. have been excluded in this version.
Semantic priming has been studied for nearly 50 years across various experimental manipulations and theoretical frameworks. These studies provide insight into the cognitive underpinnings of semantic representations in both healthy and clinical populations; however, they have suffered from several issues including generally low sample sizes and a lack of diversity in linguistic implementations. Here, we will test the size and the variability of the semantic priming effect across ten languages by creating a large database of semantic priming values, based on an adaptive sampling procedure. Differences in response latencies between related word-pair conditions and unrelated word-pair conditions (i.e., difference score confidence interval is greater than zero) will allow quantifying evidence for semantic priming, whereas improvements in model fit with the addition of a random intercept for language will provide support for variability in semantic priming across languages."
metadata(codebook_data)$identifier <- "https://doi.org/10.5281/zenodo.10888833"
metadata(codebook_data)$creator <- "Erin M. Buchanan"
metadata(codebook_data)$citation <- "Buchanan, E., Cuccolo, K., Heyman, T., Iyer, A., Coles, N., Lewis Jr, N., Peters, K., van Berkel, N., Taylor, J., Van't Veer, A. E., Montefinese, M., Valentine, K. D., Maxwell, N., Türkan, B. N., Williams, G., Oliveros-Chacana, J. C., Röer, J., Fini, C., Acar, O., … Lewis, S. C. (2024). SemanticPriming/SPAML: SPAML v1 Data Release (v1.0.0) [Data set]. Zenodo. https://doi.org/10.5281/zenodo.10888833"
metadata(codebook_data)$url <- "https://github.com/SemanticPriming/SPAML/releases/"
metadata(codebook_data)$datePublished <- "2024-05-01"
metadata(codebook_data)$temporalCoverage <- "2022-2024"
metadata(codebook_data)$spatialCoverage <- "Online"
codebook(codebook_data)
Dataset name: Semantic Priming Across Many Languages Trial Level Data
This dataset contains the processed trial data of the data collection from the SPAML project (example in Serbian, but all datasets are the same). The data is presented here in long format, with each trial representing one row in the data. All other trials (consent/demographic screens), fillers, fixation crosses, etc. have been excluded in this version.
Semantic priming has been studied for nearly 50 years across various experimental manipulations and theoretical frameworks. These studies provide insight into the cognitive underpinnings of semantic representations in both healthy and clinical populations; however, they have suffered from several issues including generally low sample sizes and a lack of diversity in linguistic implementations. Here, we will test the size and the variability of the semantic priming effect across ten languages by creating a large database of semantic priming values, based on an adaptive sampling procedure. Differences in response latencies between related word-pair conditions and unrelated word-pair conditions (i.e., difference score confidence interval is greater than zero) will allow quantifying evidence for semantic priming, whereas improvements in model fit with the addition of a random intercept for language will provide support for variability in semantic priming across languages.
Temporal Coverage: 2022-2024
Spatial Coverage: Online
Citation: Buchanan, E., Cuccolo, K., Heyman, T., Iyer, A., Coles, N., Lewis Jr, N., Peters, K., van Berkel, N., Taylor, J., Van’t Veer, A. E., Montefinese, M., Valentine, K. D., Maxwell, N., Türkan, B. N., Williams, G., Oliveros-Chacana, J. C., Röer, J., Fini, C., Acar, O., … Lewis, S. C. (2024). SemanticPriming/SPAML: SPAML v1 Data Release (v1.0.0) [Data set]. Zenodo. https://doi.org/10.5281/zenodo.10888833
Identifier: https://doi.org/10.5281/zenodo.10888833
Date published: 2024-05-01
Creator:
| name | value |
|---|---|
| 1 | Erin M. Buchanan |
|
#Variables
Unique participant ID number.
Distribution of values for observation
0 missing values.
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| observation | Unique participant ID number. | character | 0 | 1 | 757 | 0 | 14 | 14 | 0 |
The sender_id column in a sortable format. You can sort the data by observation and this column to ensure it is in trial order.
Distribution of values for fix_sender
0 missing values.
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| fix_sender | The sender_id column in a sortable format. You can sort the data by observation and this column to ensure it is in trial order. | character | 0 | 1 | 800 | 0 | 11 | 11 | 0 |
Participant response to the trial.
Distribution of values for response
16151 missing values.
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| response | Participant response to the trial. | character | 16151 | 0.9717089 | 2 | 0 | 4 | 7 | 0 |
Keypress used to indicate their response to the trial.
Distribution of values for response_action
17677 missing values.
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| response_action | Keypress used to indicate their response to the trial. | character | 17677 | 0.9690359 | 2 | 0 | 11 | 11 | 0 |
How the trial ended (timeout, form submit, completion, response).
Distribution of values for ended_on
0 missing values.
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| ended_on | How the trial ended (timeout, form submit, completion, response). | character | 0 | 1 | 2 | 0 | 7 | 8 | 0 |
The duration in milliseconds of the entire trial from time shown to time end. This variable was set to NA if the trial was incorrectly answered, too long (3000+ms) or too short (<=160ms). The original duration is also preserved.
Distribution of values for duration
58151 missing values.
| name | label | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| duration | The duration in milliseconds of the entire trial from time shown to time end. This variable was set to NA if the trial was incorrectly answered, too long (3000+ms) or too short (<=160ms). The original duration is also preserved. | numeric | 58151 | 0.8981392 | 160 | 755 | 2995 | 887.1916 | 414.2094 | ▇▇▂▁▁ |
The time in milliseconds from the start of the experiment it took to run (start to display) the trial.
Distribution of values for time_run
1700 missing values.
| name | label | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| time_run | The time in milliseconds from the start of the experiment it took to run (start to display) the trial. | numeric | 1700 | 0.9970222 | 38370 | 851402 | 1.1e+08 | 1108711 | 4058049 | ▇▁▁▁▁ |
The time in milliseconds from the start of the experiment it took to render (prepare, get ready for) the trial.
Distribution of values for time_render
1700 missing values.
| name | label | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| time_render | The time in milliseconds from the start of the experiment it took to render (prepare, get ready for) the trial. | numeric | 1700 | 0.9970222 | 38369 | 851401 | 1.1e+08 | 1108709 | 4058049 | ▇▁▁▁▁ |
The time in milliseconds from the start of the experiment it took to show the trial on the screen to the participant.
Distribution of values for time_show
4057 missing values.
| name | label | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| time_show | The time in milliseconds from the start of the experiment it took to show the trial on the screen to the participant. | numeric | 4057 | 0.9928935 | 38383 | 851954 | 1.1e+08 | 1106518 | 4062001 | ▇▁▁▁▁ |
The time in milliseconds from the start of the experiment it took to end the current trial.
Distribution of values for time_end
1700 missing values.
| name | label | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| time_end | The time in milliseconds from the start of the experiment it took to end the current trial. | numeric | 1700 | 0.9970222 | 39622 | 852268 | 1.1e+08 | 1109721 | 4058186 | ▇▁▁▁▁ |
The time in milliseconds from the start of the experiment it took to save the current trial.
Distribution of values for time_commit
1700 missing values.
| name | label | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| time_commit | The time in milliseconds from the start of the experiment it took to save the current trial. | numeric | 1700 | 0.9970222 | 39623 | 852270 | 1.1e+08 | 1109723 | 4058186 | ▇▁▁▁▁ |
The approximate timestamp of the trial in UTC server time.
## 568725 unique, categorical values, so not shown.
1700 missing values.
| name | label | data_type | n_missing | complete_rate | n_unique | min | median | max |
|---|---|---|---|---|---|---|---|---|
| timestamp | The approximate timestamp of the trial in UTC server time. | POSIXct | 1700 | 0.9970222 | 568725 | 2022-12-20 17:47:51.709 | 2023-10-27 13:21:45.922 | 2024-02-13 22:09:46.042 |
The time in milliseconds from the start of the experiment it took to switch between the previous trial and the current trial.
Distribution of values for time_switch
1943 missing values.
| name | label | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| time_switch | The time in milliseconds from the start of the experiment it took to switch between the previous trial and the current trial. | numeric | 1943 | 0.9965965 | 39651 | 852243 | 1.1e+08 | 1109572 | 4057532 | ▇▁▁▁▁ |
The string of letters/characters shown on the screen for the trial.
Distribution of values for word
0 missing values.
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| word | The string of letters/characters shown on the screen for the trial. | character | 0 | 1 | 3917 | 0 | 2 | 20 | 0 |
The type of stimuli shown on the screen (word or nonword).
Distribution of values for class
0 missing values.
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| class | The type of stimuli shown on the screen (word or nonword). | character | 0 | 1 | 2 | 0 | 4 | 7 | 0 |
The correct answer for the trial.
Distribution of values for correct_response
16151 missing values.
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| correct_response | The correct answer for the trial. | character | 16151 | 0.9717089 | 2 | 0 | 4 | 7 | 0 |
A logical variable indicating if the participant got the trial answer correct.
Distribution of values for correct
17677 missing values.
| name | label | data_type | n_missing | complete_rate | count | mean |
|---|---|---|---|---|---|---|
| correct | A logical variable indicating if the participant got the trial answer correct. | logical | 17677 | 0.9690359 | TRU: 517470, FAL: 35740 | 0.9353952 |
The duration in milliseconds of the entire trial from time shown to time end.
Distribution of values for original_duration
1700 missing values.
| name | label | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| original_duration | The duration in milliseconds of the entire trial from time shown to time end. | numeric | 1700 | 0.9970222 | -14 | 759 | 1620079 | 950.6084 | 3507.883 | ▇▁▁▁▁ |
The Z-scored response latency (by participant) of the duration column.
Distribution of values for Z_RT
58151 missing values.
| name | label | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| Z_RT | The Z-scored response latency (by participant) of the duration column. | numeric | 58151 | 0.8981392 | -2.9 | -0.26 | 12 | 0 | 0.9992723 | ▇▃▁▁▁ |
If the trial level data should be kept based on our exclusion rules (not too long < 3000 ms, not too short > 160ms, correctly answered).
Distribution of values for keep
0 missing values.
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| keep | If the trial level data should be kept based on our exclusion rules (not too long < 3000 ms, not too short > 160ms, correctly answered). | character | 0 | 1 | 2 | 0 | 4 | 7 | 0 |
If the participant should be kept based on our exclusion rules: must be 18 years old, saw at least 100 trials, correctly answered at least 80% of the trials SEEN.
Distribution of values for keep_participant
0 missing values.
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| keep_participant | If the participant should be kept based on our exclusion rules: must be 18 years old, saw at least 100 trials, correctly answered at least 80% of the trials SEEN. | character | 0 | 1 | 2 | 0 | 4 | 7 | 0 |
If the participant should be kept based on our exclusion rules: must be 18 years old, saw at least 100 trials, correctly answered at least 80% of the trials ANSWERED.
Distribution of values for keep_participant_answered
0 missing values.
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| keep_participant_answered | If the participant should be kept based on our exclusion rules: must be 18 years old, saw at least 100 trials, correctly answered at least 80% of the trials ANSWERED. | character | 0 | 1 | 2 | 0 | 4 | 7 | 0 |
The following JSON-LD can be found by search engines, if you share this codebook publicly on the web.
{
"name": "Semantic Priming Across Many Languages Trial Level Data",
"description": "This dataset contains the processed trial data of the data collection from the SPAML project (example in Serbian, but all datasets are the same). The data is presented here in long format, with each trial representing one row in the data. All other trials (consent/demographic screens), fillers, fixation crosses, etc. have been excluded in this version. \n\nSemantic priming has been studied for nearly 50 years across various experimental manipulations and theoretical frameworks. These studies provide insight into the cognitive underpinnings of semantic representations in both healthy and clinical populations; however, they have suffered from several issues including generally low sample sizes and a lack of diversity in linguistic implementations. Here, we will test the size and the variability of the semantic priming effect across ten languages by creating a large database of semantic priming values, based on an adaptive sampling procedure. Differences in response latencies between related word-pair conditions and unrelated word-pair conditions (i.e., difference score confidence interval is greater than zero) will allow quantifying evidence for semantic priming, whereas improvements in model fit with the addition of a random intercept for language will provide support for variability in semantic priming across languages.\n\n\n## Table of variables\nThis table contains variable names, labels, and number of missing values.\nSee the complete codebook for more.\n\n[truncated]\n\n### Note\nThis dataset was automatically described using the [codebook R package](https://rubenarslan.github.io/codebook/) (version 0.9.2).",
"identifier": "https://doi.org/10.5281/zenodo.10888833",
"creator": "Erin M. Buchanan",
"citation": "Buchanan, E., Cuccolo, K., Heyman, T., Iyer, A., Coles, N., Lewis Jr, N., Peters, K., van Berkel, N., Taylor, J., Van't Veer, A. E., Montefinese, M., Valentine, K. D., Maxwell, N., Türkan, B. N., Williams, G., Oliveros-Chacana, J. C., Röer, J., Fini, C., Acar, O., … Lewis, S. C. (2024). SemanticPriming/SPAML: SPAML v1 Data Release (v1.0.0) [Data set]. Zenodo. https://doi.org/10.5281/zenodo.10888833",
"url": "https://github.com/SemanticPriming/SPAML/releases/",
"datePublished": "2024-05-01",
"temporalCoverage": "2022-2024",
"spatialCoverage": "Online",
"keywords": ["observation", "fix_sender", "response", "response_action", "ended_on", "duration", "time_run", "time_render", "time_show", "time_end", "time_commit", "timestamp", "time_switch", "word", "class", "correct_response", "correct", "original_duration", "Z_RT", "keep", "keep_participant", "keep_participant_answered"],
"@context": "http://schema.org/",
"@type": "Dataset",
"variableMeasured": [
{
"name": "observation",
"description": "Unique participant ID number.",
"@type": "propertyValue"
},
{
"name": "fix_sender",
"description": "The sender_id column in a sortable format. You can sort the data by observation and this column to ensure it is in trial order.",
"@type": "propertyValue"
},
{
"name": "response",
"description": "Participant response to the trial.",
"@type": "propertyValue"
},
{
"name": "response_action",
"description": "Keypress used to indicate their response to the trial.",
"@type": "propertyValue"
},
{
"name": "ended_on",
"description": "How the trial ended (timeout, form submit, completion, response).",
"@type": "propertyValue"
},
{
"name": "duration",
"description": "The duration in milliseconds of the entire trial from time shown to time end. This variable was set to NA if the trial was incorrectly answered, too long (3000+ms) or too short (<=160ms). The original duration is also preserved.",
"@type": "propertyValue"
},
{
"name": "time_run",
"description": "The time in milliseconds from the start of the experiment it took to run (start to display) the trial.",
"@type": "propertyValue"
},
{
"name": "time_render",
"description": "The time in milliseconds from the start of the experiment it took to render (prepare, get ready for) the trial.",
"@type": "propertyValue"
},
{
"name": "time_show",
"description": "The time in milliseconds from the start of the experiment it took to show the trial on the screen to the participant.",
"@type": "propertyValue"
},
{
"name": "time_end",
"description": "The time in milliseconds from the start of the experiment it took to end the current trial.",
"@type": "propertyValue"
},
{
"name": "time_commit",
"description": "The time in milliseconds from the start of the experiment it took to save the current trial.",
"@type": "propertyValue"
},
{
"name": "timestamp",
"description": "The approximate timestamp of the trial in UTC server time.",
"@type": "propertyValue"
},
{
"name": "time_switch",
"description": "The time in milliseconds from the start of the experiment it took to switch between the previous trial and the current trial.",
"@type": "propertyValue"
},
{
"name": "word",
"description": "The string of letters/characters shown on the screen for the trial.",
"@type": "propertyValue"
},
{
"name": "class",
"description": "The type of stimuli shown on the screen (word or nonword).",
"@type": "propertyValue"
},
{
"name": "correct_response",
"description": "The correct answer for the trial.",
"@type": "propertyValue"
},
{
"name": "correct",
"description": "A logical variable indicating if the participant got the trial answer correct.",
"@type": "propertyValue"
},
{
"name": "original_duration",
"description": "The duration in milliseconds of the entire trial from time shown to time end.",
"@type": "propertyValue"
},
{
"name": "Z_RT",
"description": "The Z-scored response latency (by participant) of the duration column.",
"@type": "propertyValue"
},
{
"name": "keep",
"description": "If the trial level data should be kept based on our exclusion rules (not too long < 3000 ms, not too short > 160ms, correctly answered).",
"@type": "propertyValue"
},
{
"name": "keep_participant",
"description": "If the participant should be kept based on our exclusion rules: must be 18 years old, saw at least 100 trials, correctly answered at least 80% of the trials SEEN.",
"@type": "propertyValue"
},
{
"name": "keep_participant_answered",
"description": "If the participant should be kept based on our exclusion rules: must be 18 years old, saw at least 100 trials, correctly answered at least 80% of the trials ANSWERED.",
"@type": "propertyValue"
}
]
}`